print(Sys.time())

options(stringsAsFactors=F)

## suppressPackageStartupMessages(library(xts))
## suppressPackageStartupMessages(library(strucchange))
suppressPackageStartupMessages(library(parrot))
suppressPackageStartupMessages(library(tidyr))
## suppressPackageStartupMessages(library(igraph))
suppressPackageStartupMessages(library(plyr))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(xtable))
suppressPackageStartupMessages(library(zoo))

load(
    file="data/en_tweets_2016-01-01_to_2016-11-08_dimensions_election16_remove_news_docs_only_combined_replicate_psrm.RData"
)

all_trolls <- unique(all_docs$userid)
right_trolls <- unique(subset(all_docs, account_category=="RightTroll")$userid)
left_trolls <- unique(subset(all_docs, account_category=="LeftTroll")$userid)
hashtag_trolls <- unique(subset(all_docs, account_category=="HashtagGamer")$userid)
news_trolls <- unique(subset(all_docs, account_category=="NewsFeed")$userid)
commercial_trolls <- unique(subset(all_docs, account_category=="Commercial")$userid)
fearmonger_trolls <- unique(subset(all_docs, account_category=="Fearmonger")$userid)
unknown_trolls <- unique(subset(all_docs, account_category=="Unknown")$userid)
no_category_trolls <- unique(subset(all_docs, is.na(account_category))$userid)

aggregated_tweets <- subset(
    all_docs,
    substr(tweet_time, 1, 10) >= "2016-01-01"
    ## not_holdout_alt includes tweets (news feed) not included in training
    & not_holdout_alt
) %>%
    mutate(
        tweet_date = lubridate::floor_date(
                                    as.Date(substr(tweet_time, 1, 10)),
                                    unit="week"
                                )
    ) %>%
    mutate(
        X0_scaled = scale(X0),
        X1_scaled = scale(X1),
        X2_scaled = scale(X2),
        X3_scaled = scale(X3),
        X4_scaled = scale(X4),
        X1_scaled_left_orig = scale(X1_left),
        X2_scaled_left_orig = scale(X2_left),
        ## combine left troll 1st and 2nd dimensions (similar meaning)
        X1_scaled_left = scale(
            X1_left + X2_left,
            scale=sd((X1_left + X2_left)[userid %in% left_trolls], na.rm=T),
            center=mean((X1_left + X2_left)[userid %in% left_trolls], na.rm=T)
        ),
        X1_scaled_right = scale(X1_right),
        X1_scaled_hashtag = scale(X1_hashtag)
    ) %>%
    group_by(tweet_date) %>%
    summarise(
        ## ## ## daily averages of tweet content by account category
        ## ## left trolls only
        sorting2d = mean(X2_scaled[userid %in% left_trolls], na.rm=T),
        ## left trolls only trained on left trolls
        sorting1dl = mean(X1_scaled_left[userid %in% left_trolls], na.rm=T),
        sorting1dl_orig = mean(X1_scaled_left_orig[userid %in% left_trolls], na.rm=T),
        sorting2dl_orig = mean(X2_scaled_left_orig[userid %in% left_trolls], na.rm=T),
        ## ## right trolls only
        sorting2r = mean(X2_scaled[userid %in% right_trolls], na.rm=T),
        ## right trolls only trained on right trolls
        sorting1rr = mean(X1_scaled_right[userid %in% right_trolls], na.rm=T),
        ## ## other trolls
        sorting2o = mean(X2_scaled[userid %in% news_trolls], na.rm=T),
        sorting2g = mean(X2_scaled[userid %in% hashtag_trolls], na.rm=T),
        sorting2fear = mean(X2_scaled[userid %in% fearmonger_trolls], na.rm=T),
        sorting2commercial = mean(X2_scaled[userid %in% commercial_trolls], na.rm=T),
        sorting2unknown = mean(X2_scaled[userid %in% unknown_trolls], na.rm=T),
        sorting2nocategory = mean(X2_scaled[userid %in% no_category_trolls], na.rm=T),
        ## ## ## daily counts of activity by account category
        left_tweets = sum(userid %in% left_trolls),
        right_tweets = sum(userid %in% right_trolls),
        news_tweets = sum(userid %in% news_trolls),
        commercial_tweets = sum(userid %in% commercial_trolls),
        no_category_tweets = sum(userid %in% no_category_trolls),
        hashtag_tweets = sum(userid %in% hashtag_trolls),
        unknown_tweets = sum(userid %in% unknown_trolls),
        fearmonger_tweets = sum(userid %in% fearmonger_trolls)
    )







#### text scaling plots


#### trained on left trolls, left trolls only
## flipped sign - sign has no meaning with pca
source("code/09a_plot_tweet_scales_over_time_figure_left_trolls.R")

#### trained on right trolls, right trolls only
source("code/09b_plot_tweet_scales_over_time_figure_right_trolls.R")

#### left and right trolls, trained on both
source("code/09c_plot_tweet_scales_over_time_figure_left_and_right_trolls.R")

#### troll activity
source("code/09d_plot_tweet_scales_over_time_figure_troll_activity.R")
